Converting articles from Hugi 12 - 17 to Panorama

Adok/Hugi

What follows is the source code of the program I wrote in order to simplify the task of converting the articles from Hugi 12 - 17 to the format of the Panorama 2 engine that has been used since issue 20. I've included it because it contains some general-purpose functions for text processing using C/C++.

/*
     sr2cd.cpp
     Modifies Hugi articles formatted for Street Raider's engine (#12 - #17)
     so that manual conversion to Chris Dragan's format (since #18, or rather 
     since #20 as there have been some changes) is simplified:
     * Conversion of title and author information - including "capitalizing"
     * Removal of all colour codes
     * Conversion of the image anchors (@IMG)
     * Conversion of the links (@REF) - including "mailto:"
     * Conversion of the image links (@IREF) - including "mailto:"
     * Replacement of various strings (primarily Internet URLs and special characters)
     * Headlines (might need to be manually edited)
     * Conversion of ASCII code encodings (ASC)
     * Marking of program codes
     * Template for sub-headlines

     The original files must have the extension .TXT.
     DIR_ORIG is the working directory.
*/

#define DIR_ORIG "c:\\temptxt\\"
#define DIR_ORIG_LENGTH 15
#define MAX_TEXT_LENGTH 1000


#include <stdio.h>
#include <stdlib.h>
#include <dir.h>
#include <string.h>
#include <conio.h>


// Auxillary function for the conversion of title or author information
// (including "capitalizing")
// "Return value" (by reference): String with title or author information
// Warning: This function doesn't check if enough memory has been reserved
// for the "return string"!
void convert_title_author (char *tempchar, FILE *write, FILE *read)
{
  char  capitalize    = 1;
  
  do
  {
    *tempchar = fgetc (read);
    switch (*tempchar)
    {
      case 10:
      case 13:
        break;
      case ' ':
      case '-':
      case '/':
      case '(':
      case '\"':
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
        fputc (*tempchar, write);
        capitalize = 1;
        break;
      default:
        if (!capitalize && *tempchar >= 'A' && *tempchar <= 'Z')
        {
          *tempchar += 'a' - 'A';
        }
        capitalize = 0;
        fputc (*tempchar, write);
    }
    tempchar ++;
  } while (*(tempchar - 1) != 10);

  *tempchar = '\0';
}


// Moves all characters in a string starting from position "tempchar"
// to the right by "delta" positions
// Warning: This function doesn't check if enough memory has been reserved!
void moveright (char *temp, int tempchar, int delta)
{
  int  counter      = strlen (temp);

  temp [counter + delta] = temp [counter];
  temp [counter] = ' ';

  while (counter > tempchar)
  {
    counter --;
    temp [counter + delta] = temp [counter];
  }
}


// Moves all characters in a string starting from position "tempchar"
// to the left by "delta" positions
void moveleft (char *temp, int tempchar, int delta)
{
  do
  {
    temp [tempchar - delta] = temp [tempchar];
    tempchar ++;
  } while (temp [tempchar - 1] != '\0');
}


// Inserts a source string into an already existing target string starting
// with the given position
// Return value: Position in the target string after insertion of the source string
int mystrcpy (char *temp, int tempchar, char *source)
{
  while (*source != '\0')
  {
    temp [tempchar] = *source;
    tempchar ++;
    source ++;
  }
  
  return tempchar;
}


// Replaces a substring by another
// Warning: This function doesn't check if enough memory has been reserved!
void myreplace (char *target, char *replace, char *with)
{
  char  sign          = strlen (with) > strlen (replace),
        temp [MAX_TEXT_LENGTH + 1];
  int   counter       = strlen (target) + 1,
        delta         = sign ? strlen (with) - strlen (replace)
                             : strlen (replace) - strlen (with);

  strcpy (temp, target);

  while (counter)
  {
    counter --;
    if (target [counter] == replace [0]
        && counter + strlen (replace) <= strlen (target))
    {
      mystrcpy (temp, counter, replace);

      if (!strcmp (temp, target))
      {
        if (delta)
        {
          if (sign)
          {
            moveright (target, counter, delta);
          }
          else
          {
            moveleft (target, counter + delta, delta);
          }
        }
        mystrcpy (target, counter, with);
      }
      strcpy (temp, target);    
    }
  }
}


void main()
{
  char  directory [DIR_ORIG_LENGTH + 3],
       *dir_orig             = DIR_ORIG,
        dir_temp [DIR_ORIG_LENGTH + 259],
        temp [MAX_TEXT_LENGTH + 1],
        prev [MAX_TEXT_LENGTH + 1],
        flag,
        font_code            = 0;
  int   tempchar,
        counter,
        delta;
  FILE *read,
       *write;
  struct ffblk     ffblk;

  clrscr ();

  strcpy (directory, dir_orig);
  strcat (directory, "*.txt");

  printf ("Searching for: %s\n", directory);

  if (findfirst (directory, &ffblk, 0))
  {
    printf ("Error: No files found\n");
    exit (1);
  }

  do
  {
    strcpy (dir_temp, dir_orig);
    strcat (dir_temp, ffblk.ff_name);
    read = fopen (dir_temp, "rb");

    printf ("File name: %s\n",dir_temp);
    if (read == NULL)
    {
      printf ("Error: File not found\n");
      exit (1);
    }

    dir_temp [strlen (dir_temp) - 1] = '0';
    write = fopen (dir_temp, "wb");

    // Conversion of title and author information - including "capitalizing"
    // In addition: Headlines (might need manual editing)
    fputs ("<article scheme=default title=\"", write);
    for (tempchar = 8; tempchar; tempchar--)
    {
      fgetc (read);
    }
    convert_title_author (prev, write, read);
    fputs ("\" author=\"", write);
    for (tempchar = 9; tempchar; tempchar--)
    {
      fgetc (read);
    }
    convert_title_author (temp, write, read);
    fputs ("\">\r\n\r\n<p align=c fl=0>\r\n<font style=headline_def>\r\n", write);      
    fputs (prev, write);
    fputs ("</font>\r\n<p>\r\n<link external=mailto: ***>\r\n<i>\r\n", write);
    fputs (temp, write);
    fputs ("</i>\r\n</link>\r\n\r\n<p spacing 14>\r\n"
           "<p spacing 0 align j>\r\n\r\n\r\n\r\n\r\n\r\n", write);

    // Template for sub-headlines
    fputs ("<p spacing 15>\r\n<font style subheadline_def>\r\n\r\n"
           "</font>\r\n\r\n<p spacing 16>\r\n\r\n", write);

    // Actual article
    strcpy (temp, "");
    do
    {       
      // Replacement of various strings
      myreplace (temp, "<", "&emp;lt;");

      // Marking of program codes
      if (!font_code)
      {
        if (temp [0] == '' && temp [1] == '0')
        {
          fputs ("<p spacing=16 fl=0 align=j>\r\n"
                 "<font style=code>\r\n<pre>", write);
          font_code = -1;
        }
      }

      tempchar = 0;
      delta = 0;
      do
      {
        if (temp [tempchar + delta] == '')
        {
          delta ++;
          switch (temp [tempchar + delta])
          {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
              delta ++;
              break;
            case 'A':
              // Conversion of ASCII code encodings (ASC)
              delta += 3;
              counter = temp [tempchar + delta] - '0';
              counter *= 10;
              delta ++;
              counter += temp [tempchar + delta] - '0';
              counter *= 10;
              delta ++;
              counter += temp [tempchar + delta] - '0';
              counter *= 10;
              temp [tempchar + delta] = (char) counter;
              break;
          }
        }
        temp [tempchar] = temp [tempchar + delta];
        tempchar ++;
      } while (temp [tempchar - 1] != '\0');

      tempchar = 0;
      do
      {
        if (temp [tempchar] == '@')
        {
          // Conversion of the image anchors (@IMG)
          if (temp [tempchar + 1] == 'I' &&
              temp [tempchar + 2] == 'M' &&
              temp [tempchar + 3] == 'G' &&
              temp [tempchar + 4] == '=')
          {
            moveright (temp, tempchar + 6, 14);
            // @IMG="
            // <image file "images/
            //       14 byte difference
            tempchar = mystrcpy (temp, tempchar, "<image file=\"images\\");
            while (temp [tempchar] != '\"')
            {
              tempchar ++;
            }
            tempchar ++;
            moveright (temp, tempchar, 1);
            temp [tempchar] = '>';
          }

          // Conversion of the links (@REF) - including "mailto:"
          else
          if (temp [tempchar + 1] == 'R' &&
              temp [tempchar + 2] == 'E' &&
              temp [tempchar + 3] == 'F' &&
              temp [tempchar + 4] == '=' ||
              (temp [tempchar + 4] == '#' &&
               temp [tempchar + 5] == '='))
          {
            flag = 0;
            if (temp [tempchar + 4] == '#')
            {
              moveleft (temp, tempchar + 5, 1);
            }
            counter = tempchar + 7;
            while (temp [counter] != '\"')
            {
              counter ++;
            }
            counter ++;
            if (temp [counter] == ':')
            {
              for (delta = tempchar + 6; delta < counter - 1; delta ++)
              {
                prev [delta - tempchar - 6] = temp [delta];
              }
              prev [delta - tempchar - 6] = '\0';

              delta = counter + 2;
              do
              {
                temp [tempchar + 6 + delta - counter - 2] = temp [delta];
                if (temp [delta] == '@')
                {
                  flag = -1;
                }
                delta ++;
              } while (temp [delta - 1] != '\"');

              if (flag)
              {
                moveright (temp, tempchar + 6, 17);
                // @REF="
                // <link external "mailto:
                //       17 byte difference
                mystrcpy (temp, tempchar, "<link external=\"mailto:");
                tempchar += 17;
              }
              else
              {
                moveright (temp, tempchar + 6, 10);
                // @REF="
                // <link external "
                //       10 byte difference
                mystrcpy (temp, tempchar, "<link external=\"");
                tempchar += 10;
              }

              tempchar += 6 + delta - counter - 2;
              temp [tempchar] = '>';

              tempchar = mystrcpy (temp, tempchar + 1, prev);
              moveright (temp, tempchar, 7 - 2);
              // </link>
              // 7 byte
              tempchar = mystrcpy (temp, tempchar, "</link>") - 1;
            }
            else
            {
              delta = tempchar + 6;
              while (temp [delta] != '\"')
              {
                if (temp [delta] == '@')
                {
                  flag = -1;
                }
                delta ++;
              }
              if (flag)
              {
                moveright (temp, tempchar + 6, 17);
                // @REF="
                // <link external "mailto:
                //       17 byte difference
                tempchar = mystrcpy (temp, tempchar, "<link external=\"mailto:");
                delta = 17 + counter - tempchar;
                moveright (temp, tempchar, delta);
                tempchar = counter + 17;
              }
              else
              {
                moveright (temp, tempchar + 6, 10);
                // @REF="
                // <link external "
                //       10 byte difference
                tempchar = mystrcpy (temp, tempchar, "<link external=\"");
                delta = 10 + counter - tempchar;
                moveright (temp, tempchar, delta);
                tempchar = counter + 10;
              }             
              moveright (temp, tempchar, 1);
              temp [tempchar] = '>';                    
              tempchar += delta;
              moveright (temp, tempchar, 7 - 1);
              // </link>
              // 7 byte
              tempchar = mystrcpy (temp, tempchar, "</link>") - 1;
            }
          }

          // Conversion of the image links (@IREF) - including "mailto:"
          else
          if (temp [tempchar + 1] == 'I' &&
              temp [tempchar + 2] == 'R' &&
              temp [tempchar + 3] == 'E' &&
              temp [tempchar + 4] == 'F' &&
              temp [tempchar + 5] == '=')
          {
            flag = 0;

            delta = tempchar + 7;
            do
            {
              prev [delta - tempchar - 7] = temp [delta];
              delta ++;
            } while (temp [delta] != '\"');
            prev [delta - tempchar - 7] = '\0';
            
            delta += 3;

            counter = delta;
            while (temp [counter] != '\"')
            {
              if (temp [counter] == '@')
              {
                flag = -1;
              }
              counter ++;
            }

            if (flag)
            {
              if (delta - tempchar < 23)
              {
                moveright (temp, delta, 23 - delta + tempchar);
                counter += 23 - delta + tempchar;
              }
              else if (delta - tempchar > 23)
              {
                moveleft (temp, delta, delta - tempchar - 23);
                counter -= delta - tempchar - 23;
              }
              mystrcpy (temp, tempchar, "<link external=\"mailto:");
            }
            else
            {
              if (delta - tempchar < 16)
              {
                moveright (temp, delta, 16 - delta + tempchar);
                counter += 16 - delta + tempchar;
              }
              else if (delta - tempchar > 16)
              {
                moveleft (temp, delta, delta - tempchar - 16);
                counter -= delta - tempchar - 16;
              }
              mystrcpy (temp, tempchar, "<link external=\"");
            }
            
            counter ++;

            moveright (temp, counter, 5);
            counter = mystrcpy (temp, counter, " desc=");
            
            counter += 3;
            while (temp [counter - 1] != '\"')
            {
              counter ++;
            }

            moveright (temp, counter, 21 + 9 + strlen (prev));
            counter = mystrcpy (temp, counter, "><image file=\"images\\");
            counter = mystrcpy (temp, counter, prev);
            tempchar = mystrcpy (temp, counter, "\"></link>") - 1;
          }
        }
        tempchar ++;
      } while (temp [tempchar - 1] != '\0');

      // Replacement of various strings
      myreplace (temp, "adok@blackbox.at", "cdvolko@gmx.net");
      myreplace (temp, "hugi@netway.at", "cdvolko@gmx.net");
      myreplace (temp, "http://home.pages.de/~hugi/", "http://www.hugi.de/");
      myreplace (temp, "http://hugi.home.pages.de/", "http://www.hugi.de/");
      myreplace (temp, "http://privat.schlund.de/hugi/", "http://www.hugi.de/");

      myreplace (temp, ".raw", ".jpg");

      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
      myreplace (temp, "", "<font charset w></font>");
            
      myreplace (temp, "", "-");
      myreplace (temp, "", "<image file images\\bullet.bmp><space width 0>");
      myreplace (temp, "", "-");
      myreplace (temp, "", "-");
      
      strcpy (prev, temp);
      fgets (temp, MAX_TEXT_LENGTH, read);

      // Marking of program codes
      if (font_code)
      {
        if (temp [0] != '' || temp [1] != '0')
        {
          prev [strlen (prev) - 2] = '\0';
          strcat (prev, "</pre>\r\n</font>\r\n<p>\r\n");
          font_code = 0;
        }
      }

      fputs (prev, write);

      flag = feof (read) && !strcmp (prev, temp);
      /*
        Explanation:
        
        There are two possibile cases regarding the end of file (<EOF>):
        1. TEXT<EOF>
        2. TEXT<CR><LF><EOF>
        
        In case 1, fgets reads a new line and bumps into <EOF>. Therefore
        feof (read) == TRUE. But since it's a new line, it still has to be
        procssed. Therefore, flag = FALSE. On the next call of fgets, we have
        case 2.

        In case 2, fgets bumps into <EOF> (only) after the processing of
        the last TEXT line. That's why temp remains unchanged. But since the
        last text line must not be printed again, the contents of prev and temp
        will be compared. If they are the same, flag = TRUE and the processing
        of the file ends.
      */
    } while (!flag);

    fclose (write);
    fclose (read);
    flag = findnext (&ffblk);
    
  } while (!flag);
}

Adok/Hugi